*******************************************
*** REPLICATION FOR HIGUCHI ET AL. 2024 ***
*** 10 March 2024                       ***
*** Prepared by Yuki Higuchi            ***
*** higuchi@sophia.ac.jp                ***      
*******************************************

use "higuchi_et_al.dta", clear

///*** Defining and labelling control variables ***/// 
gen male = (Gender ==1)
gen bengali = (Tribe ==0)
gen muslim = (Religion ==1)
replace Schooling = 0 if Schooling == 999
gen income_5_pc = (income_5 / f_equiv) 
gen ln_income_5_pc = ln(income_5_pc + (((income_5_pc)^2 + 1)^0.5))
gen migrant = (C0 != 1)
gen identity = (D4 == 4 | D4 == 5)
gen equality = (D3 == 2)

label var male "=1 if male"
label var bengali "=1 if Bengali"
label var muslim "=1 if Muslim"
label var migrant "=1 if migrated in the past 5 years"
label var ln_income_5_pc "Past per capita income (IHS transformed)"
label var identity "=1 if feel belonged to a religion"
label var equality "=1 if averse to inequality"
label var D2_6 "Distrust in a foreigner"

global x1 "male bengali muslim Schooling Age migrant f_n ln_income_5_pc non_head"
global x  "male bengali muslim Schooling Age migrant f_n ln_income_5_pc non_head identity equality D2_6"

///*** Defining and labelling outcome variables ***/// 
gen JOD = D5 * 20 - 20 
gen JOD_any = (JOD != 0)
gen JOD_R = D7 * 20 - 20 
for num 4 5 7: gen CX_1d   = (CX_1 == 5)

label var JOD "JOD payment (BDT)"
label var JOD_any "JOD (=1 if paid non-zero)"
label var JOD_R "Hypothetical JOD payment by Rohingya (BDT)"
label var D2_7 "Distrust in Rohingya refugees"
label var C4_1d "HH income declined (=1 if yes)"
label var C5_1d "Commodity price increased (=1 if yes)"
label var C7_1d "Level of crime increased (=1 if yes)"
label var change_5_d "Forest degraded (=1 if yes)"

foreach x in JOD D2_7 JOD_R C2 C3 {
egen    `x'_mean = mean(`x') if dist_ukhia !=. 
replace `x'      = `x'_mean  if dist_ukhia !=. & `x' == . | dist_ukhia !=. & `x' == .a
egen    `x'_std  = std(`x')  if dist_ukhia !=.
}
pca D2_7_std JOD_R_std C2_std C3_std if dist_ukhia!=.
predict pc_opinion, score
label var pc_opinion "Opinion index (PCA)"

pca C4_1d C5_1d C7_1d change_5_d if dist_ukhia!=.
predict pc_damage, score
label var pc_damage "Damage index (PCA)"

global y "JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage"

///*** Defining and labelling variables used for heterogeneous analysis ***///
gen near_300 = (dist_road_highway < 0.300)
replace Water_5 = Water if Water_change == 1
gen Water_d   = (Water ==1    | Water ==7    | Water ==8)
gen Water_5_d = (Water_5 == 1 | Water_5 == 7 | Water_5 ==8)
gen Water_improve = (Water_5_d == 0 & Water_d == 1)
replace Hospital_5 = Hospital if Hospital_change == 1 
gen Hospital_improve_s = (Hospital_5 - Hospital) / Hospital_5
gen Hospital_improve = (Hospital_improve >= 0.2)

label var near_300 "=1 if located within 300 m from the newly paved road"
label var Water_improve "=1 if water access improved in the past 5 years"
label var Hospital_improve "=1 if health care access improved in the past 5 years"

///*** Analysis ***///

/* Table 1, columns 1 & 2 */
tabstat dist_ukhia $x if dist_ukhia!=., s(N mean sd)

eststo clear
foreach x in $x {
eststo: areg `x' dist_ukhia i.enumerator i.phase, a(upazila) cluster(village_id)
}
esttab using temp.csv, b(a2) se(a2) r2 star star(* 0.10 ** 0.05 *** 0.01)  replace

reg dist_ukhia $x1 i.upazila i.enumerator i.phase, cluster(village_id)
test male = bengali  = muslim  = Schooling  = Age  = migrant  = f_n  = ln_income_5_pc  = non_head  = 0
reg dist_ukhia $x  i.upazila i.enumerator i.phase, cluster(village_id)
test male = bengali  = muslim  = Schooling  = Age  = migrant  = f_n  = ln_income_5_pc  = non_head = identity = equality = D2_6 = 0

/* Table 1, columns 3 & 4 */
tabstat dist_ukhia $x if dist_ukhia!=. & group == 1, s(N mean sd)

eststo clear
foreach x in $x {
eststo: areg `x' dist_ukhia i.enumerator i.phase if group == 1, a(upazila) cluster(village_id)
}
esttab using temp.csv, b(a2) se(a2) r2 star star(* 0.10 ** 0.05 *** 0.01)  replace

reg dist_ukhia $x1 i.upazila i.enumerator i.phase if group == 1, cluster(village_id)
test male = bengali  = muslim  = Schooling  = Age  = migrant  = f_n  = ln_income_5_pc  = non_head  = 0
reg dist_ukhia $x  i.upazila i.enumerator i.phase if group == 1, cluster(village_id)
test male = bengali  = muslim  = Schooling  = Age  = migrant  = f_n  = ln_income_5_pc  = non_head = identity = equality = D2_6 = 0

/* Table 2*/
tabulate JOD if dist_ukhia!=. 
tabstat  JOD D2_7 JOD_R C2 C3 pc_opinion C4_1d C5_1d C7_1d change_5_d pc_damage if dist_ukhia!=., s(N mean sd)

tabulate JOD if dist_ukhia!=. & group == 1
tabstat  JOD D2_7 JOD_R C2 C3 pc_opinion C4_1d C5_1d C7_1d change_5_d pc_damage if dist_ukhia!=. & group == 1, s(N mean sd)

/* Appendix Table A2*/
tabstat dist_ukhia $x if dist_ukhia!=., by(group) s(N mean sd)

/* Appendix Table A3*/
tabulate JOD group if dist_ukhia!=. , col
tabstat  JOD D2_7 JOD_R C2 C3 pc_opinion C4_1d C5_1d C7_1d change_5_d pc_damage if dist_ukhia!=., by(group) s(N mean sd)
	   
/* Table 3*/
eststo clear 
foreach y in JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage {
eststo: areg `y' dist_ukhia     i.enumerator i.phase, a(upazila) cluster(village_id)
}
foreach y in JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage {
eststo: areg `y' dist_ukhia $x1 i.enumerator i.phase, a(upazila) cluster(village_id)
}
foreach y in JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage {
eststo: areg `y' dist_ukhia $x  i.enumerator i.phase, a(upazila) cluster(village_id)
}
esttab using temp.csv, b(a2) se(a2) r2 star star(* 0.10 ** 0.05 *** 0.01)  replace

tabstat JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage if dist_ukhia != ., s(N mean sd) 

esttab using anderson.csv, b(a2) p(a5) star star(* 0.10 ** 0.05 *** 0.01) noparen replace

* Anderson's q-values are obtained as follows. 
* (1) Visit https://are.berkeley.edu/~mlanderson/ARE_Website/Research.html
* (2) Download "Stata code to compute "sharpened" False Discovery Rate (FDR) adjusted q-values"
* (3) Open the do-file and run 
* (4) Copy and paste values in "anderson.csv" as instructed by the do-file

/* Figure 2*/
sort upazila 
by upazila: egen dist_ukhia_mean = mean(dist_ukhia)
gen dist_ukhia_demean = dist_ukhia -  dist_ukhia_mean

binscatter JOD dist_ukhia_demean, ///
title("Paid amount in JOD (BDT)") ytitle("") xtitle("Demeaned distance to the camp (km)", size(large)) ///
ylabel(10(10)40) xlabel (-20(20)20) scheme(tab3) name(graph1)

binscatter pc_opinion dist_ukhia_demean, ///
title("Nagative opinion index") ytitle("") xtitle("Demeaned distance to the camp (km)", size(large)) ///
ylabel(-1(0.5)1) xlabel (-20(20)20) scheme(tab3) name(graph2)

graph combine graph1 graph2

/* Figure 3*/
egen  dist_std = std(dist_ukhia) if dist_ukhia !=.
label variable dist_std "Standardized distance from the camp"
egen  pc_opinion_std = std(pc_opinion) if dist_ukhia != .

foreach x in $x {
egen `x'_std = std(`x') if dist_ukhia != . 
}

global x_std "male_std bengali_std muslim_std Schooling_std Age_std migrant_std f_n_std ln_income_5_pc_std non_head_std identity_std equality_std D2_6_std"

areg JOD_std        dist_std $x_std i.enumerator i.phase, a(upazila) cluster(village_id)
estimates store JOD
areg pc_opinion_std dist_std $x_std i.enumerator i.phase, a(upazila) cluster(village_id)
estimates store index

coefplot (JOD, label(Paid amount in JOD (BDT))) (index, label(Negative opinion index)), keep(dist_std $x_std) xline(0) xlabel(-1 (0.25) 1, nogrid labsize(small)) ///
xtitle("Standardized coefficient of covariates", size(medium)) ///
levels(95) scheme(tab3)  legend(size(medium)) ///
coeflabels(dist_std           = "Distance to the camp (km)" ///
           male_std           = "=1 if male"   ///
           bengali_std        = "=1 if Bengali"  ///
		   muslim_std         = "=1 if Muslim" ///
		   Schooling_std      = "Competed years of schooling" ///
		   Age_std            = "Age" ///
		   migrant_std        = "=1 if migrated" ///
		   f_n_std            = "Number of HH members" ///
		   ln_income_5_pc_std = "Past per capita income (IHS-transformed)" ///
		   non_head_std       = "=1 if non-HH head responded" ///
		   identity_std       = "= 1 if feel belonged to a religion" ///
		   equality_std       = "=1 if averse to inequality" ///
		   D2_6_std           = "Distrust in a foreigner" )

/* Table 4*/
eststo clear 
foreach y in JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage {
eststo: areg `y' dist_ukhia     i.enumerator i.phase if group ==1, a(upazila) cluster(village_id)
}
foreach y in JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage {
eststo: areg `y' dist_ukhia $x1 i.enumerator i.phas if group ==1, a(upazila) cluster(village_id)
}
foreach y in JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage {
eststo: areg `y' dist_ukhia $x  i.enumerator i.phase if group ==1, a(upazila) cluster(village_id)
}
esttab using temp.csv, b(a2) se(a2) r2 star star(* 0.10 ** 0.05 *** 0.01)  replace

tabstat JOD JOD_any D2_7 JOD_R C2 C3 pc_opinion pc_damage if dist_ukhia != . & group ==1, s(N mean sd) 

esttab using anderson2.csv, b(a2) p(a5) star star(* 0.10 ** 0.05 *** 0.01) noparen replace
* This is the input for computation of Anderson's q-values 

/* Table 5*/
foreach hetero in near_300 Water_improve Hospital_improve gained_occup {
gen d_`hetero' = dist_ukhia * `hetero'
}

foreach hetero in near_300 gained_occup Water_improve Hospital_improve {
eststo: areg JOD dist_ukhia `hetero' d_`hetero' $x    i.enumerator i.phase , a(upazila) cluster(village_id)
}

foreach hetero in near_300 gained_occup Water_improve Hospital_improve {
eststo: areg JOD dist_ukhia `hetero' d_`hetero' $x    i.enumerator i.phase if group == 1, a(upazila) cluster(village_id)
}
esttab using temp.csv, b(a2) se(a2) r2 star star(* 0.10 ** 0.05 *** 0.01)  replace

esttab using anderson3.csv, b(a2) p(a5) star star(* 0.10 ** 0.05 *** 0.01) noparen replace
* This is the input for the computation of Anderson's q-values 

/* Figure 4*/
forvalues size = 0(1)10{
preserve
drop if dist_original_camp < `size'
eststo: areg JOD dist_ukhia $x i.enumerator i.phase, a(upazila) cluster(village_id) 
estimates store drop`size'
restore
}

#delimit ;
coefplot drop0 drop1 drop2 drop3 drop4 drop5 drop6 drop7 drop8 drop9 drop10,
keep(dist_ukhia) vertical yline(0) legend(off) levels (95) replace mcolor(black)
ciopts(lcolor(black) recast(rcap)) gen(_c) ;
#delimit cr

keep _c*
cap drop bw
gen bw = . //identify to which bandwidth size the estimates belong
replace bw = _cplot - 1

// Plot point estimates and CI against bandwidth size
#delimit ;
scatter _cb bw, mc(black) || rcap _cul1 _cll1 bw, lc(black) ||,
yline(0, lp(dash)) xtitle("Dropped if located within X km of the original camps", margin(small))
ytitle("Coefficiant (95% CI)", margin(small)) legend(off)
xlabel(0(2)10) ylabel(-1(0.2)0) 
title("Donut hole analysis") 
scheme(tab3);
#delimit cr